KAC Data Analysis

Author

Daiil Jun

Project goals

  1. understand the current child care landscape in Kansas, through analysis of data provided

  2. assess the child care landscapes of states comparable to Kansas through student research and data collection of comparable datasets to Kansas

  3. draw conclusions based on the child care landscapes of Kansas and selected states, predict an outcome for Kansas through 1-year, 3-year, and 5-year trend analysis.

Data Exploration - Kansas State Level

Kansas care facility trend from 2017 to 2023

First, Kansas data is broken up into different types of child care licensing. It would be useful to visualize the difference between the types of child care centers, and where the individual centers are throughout the state.

Code
ks_care_facility <- read_rds("./Data/cleaned_ks/care_facility.rds")

ks_care_facility <- ks_care_facility |> 
  rename(
    program = `Program Type`,
    n_facility = `Total Facilities`,
    n_capacity = `Total Capacity`
  )

Year trend visualization - Number of facility and capacity

Code
# n of facility trend
ggplot(ks_care_facility, aes(x = year, y = n_facility)) +
  geom_col() +
  facet_wrap( ~ program, scales = "free")

Code
# n of capacity trend
ggplot(ks_care_facility, aes(x = year, y = n_capacity)) + 
  geom_col() +
  facet_wrap( ~ program, scales = "free")
Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_col()`).

Year program trend visualization - N of facility and capacity

Code
#facility
ggplot(ks_care_facility, aes(x = (n_facility), y = program)) +
  geom_col() + 
  geom_text(
    aes(label = n_facility, x = n_facility + 10)
  ) + 
  facet_wrap( ~ year)

Code
#capacity
ggplot(ks_care_facility, aes(x = (n_capacity), y = program)) +
  geom_col() + 
  geom_text(
    aes(label = n_capacity, x = n_capacity + 10)
  ) + 
  facet_wrap( ~ year)

Kansas care facility most recent status

data import

Code
ks_facility <- read_rds("./Data/cleaned_ks/ks_facility.rds")

desired capacity vs licensed capacity - Year of 2024

Code
ggplot(ks_facility, aes(x = (n_desired_cap), y = care_facility)) +
  geom_col(aes(fill = "N of desired capacity"),
           width = 0.25,
           position = position_nudge(y = -0.25)) +  
  geom_col(aes(x = n_licensed_cap, fill = "N of licensed capacity"),
           width = 0.25,
           position = position_nudge(y = 0)) + 
  geom_col(aes(x = n_child_enrolled, fill = "N of enrolled children"),
           width = 0.25,
           position = position_nudge(y = 0.25)) + 
  labs(
    title = "KS 2024 status of childcare facility",
    x = " ",
    y = "Type of childcare facility",
    fill = " "
  )

Kansas Family Child Care Rate

Code
ks_family_rate <- read_rds("./Data/cleaned_ks/ks_family_rate.rds")
ks_census <- read_rds("./Data/cleaned_ks/ks_census_cleaned.rds")
ks_census <- ks_census |> 
  mutate(
    median_h_income_monthly = median_h_income / 12
  )

#montly rate

ggplot(ks_family_rate, aes(x = avg_part_monthly, y = age_group)) + 
  geom_col(aes(fill = "Average monthly part-time rate"),
           width = 0.3, 
           position = position_nudge(y = -0.25)) + 
  geom_col(aes(x = avg_full_monthly, fill = "Average monthly full-time rate"),
           width = 0.3,
           position = position_nudge(y =  0)) + 
  geom_text(
    aes(
      label = paste0(round(avg_full_monthly/ks_census$median_h_income_monthly, digit = 2)*100, "%", "of monthly income"),
      x = 200),
      size = 2.5
    ) +
  labs(
    title = "KS 2024 Family Child Care Rate",
    y = "Age Group",
    x = "Rate",
    fill = " "
  )

Kansas Child Care Center Rate

Code
ks_child_rate <- read_rds("./Data/cleaned_ks/ks_child_rate.rds")

#montly rate

ggplot(ks_child_rate, aes(x = avg_part_monthly, y = age_group)) + 
  geom_col(aes(fill = "Average monthly part-time rate"),
           width = 0.3, 
           position = position_nudge(y = -0.25)) + 
  geom_col(aes(x = avg_full_monthly, fill = "Average monthly full-time rate"),
           width = 0.3,
           position = position_nudge(y =  0)) + 
  geom_text(
    aes(
      label = paste0(round(avg_full_monthly/ks_census$median_h_income_monthly, digit = 2)*100, "%", "of monthly income"),
      x = 300
      ),
      size = 2.5,
  ) +
  labs(
    title = "KS 2024 Child Care Center Rate",
    y = "Age Group",
    x = "Rate",
    fill = " "
  )

Data Exploration - KS County Level

Kansas care facility most recent status - County level

data import - 2024 year

Code
county_facility <- read_rds("./Data/cleaned_ks/county_facility.rds")

Desired capacity vs licensed capacity - Year of 2024

Code
county_facility <- county_facility |> 
  mutate(
    n_disc_cap = n_licensed_cap - n_desired_cap
  )

county_facility |> 
  summarise(
    mean = mean(n_disc_cap),
    sd = sd(n_disc_cap),
    .by = c(county, care_facility)
  )
# A tibble: 525 × 4
   county   care_facility          mean    sd
   <fct>    <fct>                 <dbl> <dbl>
 1 ALLEN    child care centers       41    NA
 2 ALLEN    licesned family/group    34    NA
 3 ALLEN    head start                0    NA
 4 ALLEN    preschool                 0    NA
 5 ALLEN    school age program        0    NA
 6 ANDERSON child care centers        0    NA
 7 ANDERSON licesned family/group    15    NA
 8 ANDERSON head start                2    NA
 9 ANDERSON preschool                 0    NA
10 ANDERSON school age program        0    NA
# ℹ 515 more rows
Code
county_facility |> 
  filter(
    n_disc_cap < 0
  ) 
# A tibble: 1 × 8
  county    care_facility n_child_enrolled n_desired_cap n_licensed_cap n_dcf
  <fct>     <fct>                    <dbl>         <dbl>          <dbl> <dbl>
1 MCPHERSON head start                 142           159            156    10
# ℹ 2 more variables: n_food_program <dbl>, n_disc_cap <dbl>

Data Exploration - Visualization - Tredn

Trend - Child Care Centers

Code
ks_t_child <- read_xlsx("./Data/datacenter/KS Childcare Centers.xlsx")

ks_t_child <- ks_t_child |> 
  mutate(
    across(
      .cols = c(`2017`:`2022`),
      .fns = ~ as.numeric(.)
    ) 
  ) |> 
  pivot_longer(
    cols = -c(Location, `Data Type`),
    names_to = "year",
    values_to = "n_child_facility"
  ) |> 
  mutate(
    year = as.numeric(year)
  )

ggplot(ks_t_child, aes(x = year, y = n_child_facility)) + 
  geom_col() +
  facet_wrap_paginate( ~ Location, scales = "free", ncol = 5, nrow = 3, page = 1)

Projection - ARIMA

Child Care Centers - Kansas

Code
pro_child <- ks_t_child |> 
  summarise(
    n_child_facility = sum(n_child_facility, na.rm = TRUE),
    .by = "year"
  )

ts_data <- ts(pro_child$n_child_facility, start = 2017, frequency = 1)

fit_child <- auto.arima(ts_data)
summary(fit_child)
Series: ts_data 
ARIMA(0,1,0) with drift 

Coefficients:
       drift
      10.400
s.e.   1.992

sigma^2 = 24.89:  log likelihood = -14.56
AIC=33.13   AICc=39.13   BIC=32.35

Training set error measures:
                     ME     RMSE    MAE         MPE      MAPE      MASE
Training set 0.09893328 4.073335 2.9656 0.007204792 0.4625464 0.2851538
                   ACF1
Training set -0.1583016
Code
future_child <- forecast(fit_child, h = 5)
print(future_child)
     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
2023          666.4 660.0066 672.7934 656.6221 676.1779
2024          676.8 667.7584 685.8416 662.9720 690.6280
2025          687.2 676.1263 698.2737 670.2642 704.1358
2026          697.6 684.8132 710.3868 678.0443 717.1557
2027          708.0 693.7039 722.2961 686.1360 729.8640
Code
plot(future_child, main = "ARIMA Forecast of Facilities", xlab = "Year", ylab = "Number of Facilities")

Child Care Centers - County Level - KS

Code
n_county <- length(unique(ks_t_child$Location))
#105 counties 

ks_t_child_county <- ks_t_child |> 
  filter(
    n_child_facility > 0, 
    .by = Location
  ) |> 
  filter(
    n() == 6,
    .by = Location
  )
n_county_ts <- length(unique(ks_t_child_county$Location))
county_ts <- unique(ks_t_child_county$Location)
#59 counties
#46 counties have no child care centers in one of the years from 2017 to 2022
#These counties cannot fit projection 

for(i in seq(n_county_ts)){
  pro_child_county <- ks_t_child_county |> 
    filter(Location == county_ts[i])
  
  ts_data <- ts(pro_child_county$n_child_facility, start = 2017, frequency = 1)
  
  fit_child_county <- auto.arima(ts_data)
  
  future_child_county <- forecast(fit_child_county, h = 5)
  
  plot(
    future_child_county, 
    main = paste0("ARIMA Forecast of Childcare Facilities - ", county_ts[i]),
    xlab = "Year",
    ylab = "Number of Facilities")
}

Trend - Family Care Centers

Code
ks_t_family <- read_xlsx("./Data/datacenter/KS Family care centers.xlsx")

ks_t_family <- ks_t_family |> 
  mutate(
    across(
      .cols = `2017`:`2022`,
      .fns = ~ as.numeric(.)
    ) 
  ) |> 
  pivot_longer(
    cols = -c(Location, `Data Type`),
    names_to = "year",
    values_to = "n_family_facility"
  )

ggplot(ks_t_family, aes(x = year, y = n_family_facility)) + 
  geom_col() +
  facet_wrap_paginate( ~ Location, scales = "free", ncol = 5, nrow = 3, page = 2)

Projection - ARIMA

Family care centers - KS

Code
pro_family <- ks_t_family |> 
  summarise(
    n_family_facility = sum(n_family_facility, na.rm = TRUE),
    .by = "year"
  )

ts_data <- ts(pro_family$n_family_facility, start = 2017, frequency = 1)

fit_family <- auto.arima(ts_data)
summary(fit_family)
Series: ts_data 
ARIMA(0,1,0) 

sigma^2 = 67758:  log likelihood = -34.9
AIC=71.81   AICc=73.14   BIC=71.42

Training set error measures:
                    ME    RMSE      MAE       MPE    MAPE      MASE       ACF1
Training set -170.1257 237.624 176.8743 -4.796147 4.98014 0.8366809 -0.2919201
Code
future_family <- forecast(fit_family, h = 5)
print(future_family)
     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
2023           3221 2887.407 3554.593 2710.813 3731.187
2024           3221 2749.228 3692.772 2499.487 3942.513
2025           3221 2643.200 3798.800 2337.331 4104.669
2026           3221 2553.814 3888.186 2200.627 4241.373
2027           3221 2475.063 3966.937 2080.188 4361.812
Code
plot(future_family, main = "ARIMA Forecast of Facilities", xlab = "Year", ylab = "Number of Family Facilities")

Code
n_county <- length(unique(ks_t_family$Location))
#105 counties 

ks_t_family_county <- ks_t_family |> 
  filter(
    n_family_facility > 0, 
    .by = Location
  ) |> 
  filter(
    n() == 6,
    .by = Location
  )

n_county_ts <- length(unique(ks_t_family_county$Location))
county_ts <- unique(ks_t_family_county$Location)
#all counties have more than one family care centers

for(i in seq(n_county_ts)){
  pro_family_county <- ks_t_family_county |> 
    filter(Location == county_ts[i])
  
  ts_data <- ts(pro_family_county$n_family_facility, start = 2017, frequency = 1)
  
  fit_family_county <- auto.arima(ts_data)
  
  future_family_county <- forecast(fit_family_county, h = 5)
  
  plot(
    future_family_county, 
    main = paste0("ARIMA Forecast of Family care Facilities - ", county_ts[i]),
    xlab = "Year",
    ylab = "Number of Facilities")
}

Trend - Pre-kindergarten

Pre-Kindergarten and 4-year-old At Risk Programs is the percent of public elementary schools that offer pre-kindergarten or 4-year-old At-Risk program five days a week. Public non-elementary buildings with such programs are also included in the numerator and the denominator. Data are provided by the Kansas State Department of Education. The current rate represents academic year 2022-23.

Code
ks_t_pre_k <- read_xlsx("./Data/datacenter/KS Pre-kindergarten.xlsx")

ks_t_pre_k <- ks_t_pre_k |> 
  rename(
    year = TimeFrame,
    rate_pre_k = Data
  )


ggplot(ks_t_pre_k |> filter(Location == "Kansas"), aes(x = year, y = rate_pre_k)) + 
  geom_col() 

Trend - Head Start

Head Start is the number of Head Start enrollment slots available per 100 children 3–4 years of age living in families with income below the U.S. poverty threshold.

Code
ks_t_head <- read_xlsx("./Data/datacenter/KS Head Start.xlsx")

ks_t_head <- ks_t_head |> 
  rename(
    year = TimeFrame,
    slot_per_100c = Data
  ) |> 
  mutate(
    slot_per_100c = as.numeric(slot_per_100c)
  )

ggplot(ks_t_head |> filter(Location == "Kansas"), aes(x = year, y = slot_per_100c)) +
  geom_col()

Trend - Early Head Start

Early Head Start is the number of Early Head Start slots available per 100 children from birth through 3 years of age living in families with incomes below the U.S. poverty threshold.

Code
ks_t_early_head <- read_xlsx("./Data/datacenter/KS Early Head Start.xlsx")

ks_t_early_head <- ks_t_early_head |> 
  rename(
    year = TimeFrame,
    slot_per_100c = Data
  ) |> 
  mutate(
    slot_per_100c = as.numeric(slot_per_100c)
  )

ggplot(ks_t_early_head |> filter(Location == "Kansas"), aes(x = year, y = slot_per_100c)) +
  geom_col()

Interactive Map Plot

County level - child care centers

map counties with 0 child care centers - along with the child population who needs childcare centers

Code
#county census data - N of children
county_census_cleaned <- read_rds("./Data/cleaned_ks/county_census_clened.rds")

#county care facility 2022 - 2024 number of facility
county_care_facility_long <- read_rds("./Data/cleaned_ks/county_care_facility_year.rds")

county_care_2024 <- county_care_facility_long |> 
  filter(
    year == 2024
  )

#kansas county child care centers data for 2024 
ks_county_cc <- county_facility |> 
  filter(
    care_facility == "child care centers"
  ) |> 
  left_join(county_census_cleaned, by = "county") |> 
  left_join(county_care_2024 |> dplyr::select(county:child_care), by = "county") |> 
  mutate(
    county = str_to_title(county),
    name = county
  ) |> 
  select(-county)

#kansas county geojson file import
ks_county_json <- geojsonio::geojson_read("./Data/kansas-with-county-boundaries_1099.geojson", what = "sp")


#merge data
ks_county_map <- sp::merge(ks_county_json, ks_county_cc, by.x = "name", by.y = "name", duplicateGeoms = TRUE)
Code
m <- leaflet(ks_county_map) |> 
    addProviderTiles(
    "MapBox", 
    options = providerTileOptions(
      id = "mapbox.light",
      accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN')))


bins <- c(0, 1, 5, 10, 50, 100, 200)

pals <- colorBin(
  "YlOrRd", 
  domain = ks_county_map$child_care, 
  bins = bins,
  reverse = TRUE,
  pretty = TRUE
  )


labels <- sprintf(
  "<strong>%s</strong><br/>%g childcare <br/>N of children who needs childcare: %g <br/>N of children enrolled in childcare: %g <br/> N of desired capacity: %g <br/> N of licensed capacity: %g",
  ks_county_map$name, ks_county_map$child_care, ks_county_map$n_child_under_6_lab, ks_county_map$n_child_enrolled, ks_county_map$n_desired_cap, ks_county_map$n_licensed_cap
) %>% lapply(htmltools::HTML)



m <- m |> addPolygons(
  fillColor = ~pals(child_care),
  weight = 2, 
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlightOptions = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = labels,
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto"
  )
)

m |> addLegend(
  pal = pals,
  values = ~child_care,
  title = "N of childcare center",
  opacity = 0.7,
  position = "bottomright"
)

County level - family care centers

Code
#kansas county family care centers data for 2024 
ks_county_fc <- county_facility |> 
  filter(
    care_facility == "licesned family/group"
  ) |> 
  left_join(county_census_cleaned, by = "county") |> 
  left_join(county_care_2024 |> dplyr::select(county, year, licesned_family), by = "county") |> 
  mutate(
    county = str_to_title(county),
    name = county
  ) |> 
  select(-county)

#merge data
ks_county_map_fc <- sp::merge(ks_county_json, ks_county_fc, by.x = "name", by.y = "name", duplicateGeoms = TRUE)
Code
m2 <- leaflet(ks_county_map_fc) |> 
    addProviderTiles(
    "MapBox", 
    options = providerTileOptions(
      id = "mapbox.light",
      accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN')))


bins_fc <- c(0, 10, 50, 100, 250, 500)

pals_fc <- colorBin(
  "YlOrRd", 
  domain = ks_county_map_fc$licesned_family, 
  bins = bins_fc, 
  reverse = TRUE,
  pretty = TRUE,
  na.color = "gray"
  )


labels <- sprintf(
  "<strong>%s</strong><br/>%g family care center <br/>N of children who needs childcare: %g <br/>N of children enrolled in family care: %g <br/> N of desired capacity: %g <br/> N of licensed capacity: %g",
  ks_county_map_fc$name, ks_county_map_fc$licesned_family, ks_county_map_fc$n_child_under_6_lab, ks_county_map_fc$n_child_enrolled, ks_county_map_fc$n_desired_cap, ks_county_map_fc$n_licensed_cap
) %>% lapply(htmltools::HTML)



m2 <- m2 |> addPolygons(
  fillColor = ~pals_fc(licesned_family),
  weight = 2, 
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlightOptions = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = labels,
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto"
  )
)

m2 |> addLegend(
  pal = pals_fc,
  values = ~licesned_family,
  title = "N of family care center",
  opacity = 0.7,
  position = "bottomright"
)

2022 - N of childcare centers per 1000 child pop

Childcare centers per 1000 child population.

1 indicates 1 child care centers per 1000 child in a county

Code
county_care_2022 <- county_care_facility_long |> 
  filter(
    year == 2022
  )

#kansas county child care centers data for 2024 
ks_county_22 <- county_facility |> 
  filter(
    care_facility == "child care centers"
  ) |> 
  left_join(county_census_cleaned, by = "county") |> 
  left_join(county_care_2022 |> dplyr::select(county:child_care), by = "county") |> 
  mutate(
    county = str_to_title(county),
    name = county,
    child_care_per_thou = round((child_care/n_residents_under_6)*1000, digits = 2)
  ) |> 
  select(-county)

#kansas county geojson file import
ks_county_json <- geojsonio::geojson_read("./Data/kansas-with-county-boundaries_1099.geojson", what = "sp")


#merge data
ks_county_map_22 <- sp::merge(ks_county_json, ks_county_22, by.x = "name", by.y = "name", duplicateGeoms = TRUE)
Code
m_22 <- leaflet(ks_county_map_22) |> 
    addProviderTiles(
    "MapBox", 
    options = providerTileOptions(
      id = "mapbox.light",
      accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN')))


bins <- c(0, 1, 2, 3, 4, 5, 10)

pals <- colorBin(
  "YlOrRd", 
  domain = ks_county_map_22$child_care_per_thou, 
  bins = bins,
  reverse = TRUE,
  pretty = TRUE
  )


labels <- sprintf(
  "<strong>%s</strong><br/>%g childcare per 1000 child pop in 2022 <br/>N of children who needs childcare: %g <br/>Child Population: %g <br/> N of childcare centers: %g",
  ks_county_map_22$name, ks_county_map_22$child_care_per_thou, ks_county_map_22$n_child_under_6_lab, ks_county_map_22$n_residents_under_6, ks_county_map_22$child_care
) %>% lapply(htmltools::HTML)



m_22 <- m_22 |> addPolygons(
  fillColor = ~pals(child_care_per_thou),
  weight = 2, 
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlightOptions = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = labels,
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto"
  )
)

m_22 |> addLegend(
  pal = pals,
  values = ~child_care,
  title = "N of childcare center per 1000 child pop - 2022",
  opacity = 0.7,
  position = "bottomright"
)